import pandas as pd
import numpy as np
import re
import itertools
import matplotlib.pyplot as plt
import jieba.posseg as psg

# 载入情感分析后的数据
pos_data = pd.read_csv("my_data_pos.csv", encoding = 'utf-8')
neg_data = pd.read_csv("my_data_neg.csv", encoding = 'utf-8')
worker = lambda s: [(x.word, x.flag) for x in psg.cut(str(s))] # 自定义简单分词函数
pos_data = pos_data['content']
neg_data = neg_data['content']
pos_word = pos_data.apply(worker)
neg_word = neg_data.apply(worker)
p_word = pos_word.apply(lambda x: len(x))
n_word = neg_word.apply(lambda x: len(x))
pos_word = sum(pos_word, [])
neg_word = sum(neg_word, [])
# 词
wordp = [x[0] for x in pos_word]
wordn = [x[0] for x in neg_word]
# 词性
naturep = [x[1] for x in pos_word]
naturen = [x[1] for x in neg_word]
# 构造数据框
pos_result = pd.DataFrame({'word': wordp, 'nature': naturep})
neg_result = pd.DataFrame({'word': wordn, 'nature': naturen})
# 作基础处理
pos_result = pos_result[pos_result['nature'] != 'x']
neg_result = neg_result[neg_result['nature'] != 'x']

pos_result.to_csv('pos_result.csv', index=False, encoding='utf-8')
neg_result.to_csv('neg_result.csv', index=False, encoding='utf-8')
